package com.zhava.crawler.domain.model;

import com.zhava.crawler.domain.enums.OutputFormatEnum;
import lombok.Data;
import lombok.NoArgsConstructor;

import java.util.ArrayList;
import java.util.List;
import java.util.Map;

/**
 * 分页式爬虫领域实体
 *
 * @author zhaxiang
 */
@Data
@NoArgsConstructor
public class PaginatedCrawler {

    // 爬取参数
    private String url;
    private String pageParameterName;
    private Integer startPage;
    private Integer endPage;
    private Map<String, String> headers;
    private Integer connectTimeout;
    private Integer readTimeout;
    private String cssSelector;
    private OutputFormatEnum outputFormat;
    private Integer exportFile;

    // 爬取结果
    private Integer statusCode;
    private Integer pagesCollected = 0;
    private List<PageResult> pageResults = new ArrayList<>();
    private Long totalCrawlTime;
    private Boolean fileExported = false;
    private String exportedFilePath;
    private String errorMessage;

    /**
     * 单页爬取结果
     */
    @Data
    public static class PageResult {
        private Integer pageNumber;
        private String pageUrl;
        private Object extractedData;
        private Integer statusCode;
        private Long crawlTime;
        private String errorMessage;
    }
} 